In [1]:
import pandas as pd
import networkx as netx
import numpy as np
import matplotlib.pyplot as plt
import warnings
import random
import itertools
In [2]:
dataset = pd.read_csv(
".\\data\\CA-GrQc-data.txt",
header=None,
names=["source","target"],
sep="\t",
comment="#")
G = netx.from_pandas_dataframe(dataset,"source","target")
In [3]:
#### remove all self loops
G.remove_edges_from(G.selfloop_edges())
In [4]:
def graph_walk(nodes):
graph = netx.Graph()
p = None
for n in nodes:
graph.add_node(n)
if p != None:
graph.add_edge(n,p)
p = n
return graph
def random_walk(graph, start_node = None, size = -1):
if start_node == None:
start_node = random.choice(list(graph.nodes()))
v = start_node
for c in itertools.count():
if size == c:
return
v = random.choice(list(graph.neighbors(v)))
yield v
In [5]:
walk = list(random_walk(G, size = 20))
In [6]:
plt.axis('off')
graph = graph_walk(walk)
pos=netx.spring_layout(graph)
netx.draw_networkx(graph, pos=pos, with_labels=True)
plt.show()
In [7]:
def graph_jumps(collections):
graph = netx.Graph()
for collection in collections:
p = None
for n in collection:
graph.add_node(n)
if p != None:
graph.add_edge(n,p)
p = n
return graph
def random_walk_with_jump(graph, start_node = None, size = -1, p = .2):
if start_node == None:
start_node = random.choice(list(graph.nodes()))
ev = []
v = start_node
col = [v]
for c in itertools.count():
if size == c:
break
if random.random() < p:
ev.append(col)
v = random.choice(list(graph.nodes()))
col = [v]
else:
v = random.choice(list(graph.neighbors(v)))
col.append(v)
ev.append(col)
return ev
In [8]:
jumps = random_walk_with_jump(G, size = 25, p = .6)
In [9]:
plt.axis('off')
graph = graph_jumps(jumps)
pos=netx.spring_layout(graph)
netx.draw_networkx(graph, pos=pos, with_labels=True)
plt.show()
In [10]:
def get_degrees(graph, nodes):
lookup = dict(G.degree())
for n in nodes:
yield lookup[n]
def uniform_sample(population, size = -1, with_replacement = True):
objs = list(population)
n = len(objs)
for c in itertools.count():
if size == c:
return
i = int(random.random() * n)
v = objs[i]
if not with_replacement:
del objs[i]
n = n-1
yield v
In [11]:
def print_sample_stats(sample):
n = len(sample)
sample_mu = sample.mean()
sample_std = (((sample - sample_mu)**2).sum() / (n-1))**(1/2)
se_mean = sample_std / (n)**(1/2)
percentile = np.percentile(sample,[2.5,97.5])
print("sample mu:", sample_mu)
print("sample std:", sample_std)
print("standard error:", se_mean)
print()
print("confidence")
print("%",percentile)
In [12]:
iters = 4000
n = 500
Totals = []
for c in range(iters):
samples = list(uniform_sample(G.nodes(), size = n, with_replacement = True))
degrees = np.array(list(get_degrees(G,samples)))
Totals.append(degrees.sum())
T = np.array(Totals) / n
plt.title("Sampled Average Degree")
plt.hist(x=T, bins=25)
plt.show()
In [13]:
print_sample_stats(T)
In [14]:
def get_by_degree(deg, searchBy):
for key, value in deg.items():
if searchBy(value):
yield key
degrees = dict(G.degree())
_max = np.array(list(degrees.values())).max()
stratas = { }
prev = -1
for c in range(4):
cr = c+1
_min = int(_max * (cr/4))
fx = lambda x: x > prev and x <= _min
stratas.update({ cr: list(get_by_degree(degrees,fx)) })
prev = _min
i = 1
iters = 2000
Totals = [ ]
for sn in [20,20,20,20]:
sample = stratas[i]
Totals_sub = []
for c in range(iters):
samples = list(uniform_sample(sample, size = sn, with_replacement = True))
degrees = np.array(list(get_degrees(G,samples)))
Totals_sub.append(degrees.sum())
Totals.append(Totals_sub)
i = i + 1
In [15]:
i = 1
for T in Totals:
T1 = np.array(T) / 20
_ = plt.title("Sampled Average Degree " + str(i))
_ = plt.hist(x=T1, bins=25)
plt.show()
print_sample_stats(T1)
i = i + 1
In [16]:
n = 20
n1 = len(stratas[1])
n2 = len(stratas[2])
n3 = len(stratas[3])
n4 = len(stratas[4])
N = n1 + n2 + n3 + n4
sample_mu1 = (np.array(Totals[0])/n).mean()
sample_mu2 = (np.array(Totals[1])/n).mean()
sample_mu3 = (np.array(Totals[2])/n).mean()
sample_mu4 = (np.array(Totals[3])/n).mean()
a1 = ((n1/N)*sample_mu1)
a2 = ((n2/N)*sample_mu2)
a3 = ((n3/N)*sample_mu3)
a4 = ((n4/N)*sample_mu4)
print(a1, " ", a2, " ", a3, " ", a4)
print(a1 + a2 + a3 + a4)
In [17]:
def snowballin(Graph,center,prev,depth,max_depth,outers):
if max_depth == depth:
return outers
if center in list(outers.nodes()):
return outers
else:
outers.add_node(center)
if prev != -1:
outers.add_edge(center,prev)
neighbors = list(netx.neighbors(Graph,center))
for neighbor in neighbors:
outers = snowballin(G,neighbor,center,depth+1,max_depth,outers)
return outers
In [18]:
connected_components = netx.connected_component_subgraphs(G)
sub_graphs = sorted(connected_components, key=len, reverse=True)
cc = sub_graphs[0]
node = list(cc.nodes())[0]
V = snowballin(cc,node,-1,0,4,netx.Graph())
In [19]:
plt.axis('off')
pos=netx.spring_layout(V)
netx.draw_networkx(V, pos=pos, with_labels=True)
plt.show()
In [20]:
node
Out[20]:
In [ ]: